knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center')

reads pergene

library(GenomicAlignments)
library(data.table)
library(Biostrings)
library(tidyverse)

BamFile <- "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/data/alignment/20200620/minimap2/20200620.sorted.bam"
scanBamWhat()
bam <- GenomicAlignments::readGAlignments(file = BamFile, param = Rsamtools::ScanBamParam(what = c("qname", "flag", "mapq")))
bam

table(mcols(bam[with(bam, flag != 256)])$mapq)
table(mcols(bam[with(bam, mapq == 60)])$flag)

table(with(bam, mapq == 60 & flag == 0)) %>% knitr::kable()
mean(with(bam, mapq == 60 & flag == 0))

bam <- bam[with(bam, mapq == 60 & flag == 0)]
sort(table(seqnames(bam)))
openxlsx::getSheetNames("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/data/Meta/Barcode_and_RNA_information.xlsx")
library(openxlsx)
Mat <- openxlsx::read.xlsx("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/data/Meta/Barcode_and_RNA_information.xlsx", sheet = 2)
Mat <- Mat[, c(1, 2, 5)]
Mat <- data.table(Barcode = Mat$Barcode, 
                  Name = Mat$Name, 
                  Gene = gsub("\\(与后期不一样\\)", "", Mat$Gene.Name))
read2gene <- data.table(read = mcols(bam)$qname, gene = as.character(seqnames(bam)))
read2gene <- merge(read2gene, Mat, by.x = "gene", by.y = "Gene")
sort(table(read2gene$Barcode))

Mat <- merge(Mat, as.data.frame(sort(table(read2gene$gene), decreasing = TRUE)), by.x = "Gene", by.y = "Var1", all.x = TRUE)
setkey(Mat, Barcode)
fwrite(x = read2gene, file = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/02.BamReadsSplit/read2gene_20200620.txt", row.names = F, quote = F, sep = "\t")
write.xlsx(Mat, "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/02.BamReadsSplit/reads_pergene_20200620.xlsx")

split fastq

library(ShortRead)
reads <- ShortRead::readFastq("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/data/BaseCall/20200620/guppy/fast5_pass.fastq")
reads_id <- mapply(function(x) x[1], strsplit(as.character(id(reads)), " "))

stopifnot(all(read2gene$read %in% reads_id))

for(g in unique(read2gene$Barcode)) {
  reads_g <- reads[which(reads_id %in% read2gene[Barcode == g, read])]
  reads_gi <- sort(mapply(function(x) x[1], strsplit(as.character(id(reads_g)), " ")))
  stopifnot(identical(reads_gi, sort(read2gene[Barcode == g, read])))
  ShortRead::writeFastq(reads_g, file = paste0("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/02.BamReadsSplit/20200620/", g, ".fq.gz"))
}

Add seq width

reads_gs <- reads[which(reads_id %in% read2gene[, read])]

reads_gsw <- width(reads_gs)
names(reads_gsw) <- mapply(function(x) x[1], strsplit(as.character(id(reads_gs)), " "))

read2gene$width <- reads_gsw[read2gene$read]

fwrite(x = read2gene, file = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/02.BamReadsSplit/read2gene_20200620.txt", row.names = F, quote = F, sep = "\t")


tangchao7498/DecodeR documentation built on May 27, 2023, 7:21 p.m.